# Copyright 2009 mwta committers
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License

""" Markup Example 11

Defines the method getlLinkFromXhtml() which is later used to return the href
for a given regular expression, if it exists.

(Includes the code from markup example 07)
"""

import re
import sys
import urllib
import BeautifulSoup

from xml.dom import minidom

def getLinkFromXhtml(content, text_regex):
  """getLink returns the href link for a given text_label.
  
  Args:
    content: the source content e.g. an xHTML response.
    text_regex: the text to match as a regluar experession.
  Returns:
    The href if the test is found, else None.
  """
    
  doc = minidom.parseString(content)
  links = doc.getElementsByTagName('a')
  
  rx = re.compile(text_regex)
  
  for i in links:
    if i.hasAttribute('href'):
      t = i.firstChild
      text = ""
      while t:
        if t.nodeType == t.TEXT_NODE:
          text += t.data
        t = t.nextSibling
      match = rx.search(text)
      if match:
        return str(i.toxml())
        
  return None


request = urllib.FancyURLopener()
request.addheader('Accept', 'application/xhtml+xml') 
request.addheader('User-Agent', 
  'Nokia6230/2.0+(04.43)+Profile/MIDP-2.0+Configuration/CLDC-1.1+UP.Link/6.3.0.0.0') 
response = request.open("http://www.google.co.uk/m")
content = response.read() 

print getLinkFromXhtml(content, 'Download Google Maps')

soup = BeautifulSoup.BeautifulSoup(content)
print soup.prettify()

# write the content to a file so it can be displayed in a browser
f = open("markup_ex04_prettified.xml", "wb")
f.write(content)
f.close()

